{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have already seen how the RAG is implemented in data.\n",
    "In this note, we will focus more on how to make each component more configurable, \n",
    "espeically the data processing pipeline to help us with experiments where we will see how useful they are in benchmarking."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_processing config\n",
    "\n",
    "config = {\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the data pipeline and the backend data processing\n",
    "from lightrag.core.embedder import Embedder \n",
    "from lightrag.core.types import ModelClientType\n",
    "from lightrag.components.data_process import DocumentSplitter, ToEmbeddings\n",
    "from lightrag.core.component import Sequential\n",
    "\n",
    "def prepare_data_pipeline():\n",
    "    model_kwargs = {\n",
    "        \"model\": \"text-embedding-3-small\",\n",
    "        \"dimensions\": 256,\n",
    "        \"encoding_format\": \"float\",\n",
    "    }\n",
    "\n",
    "    splitter_config = {\n",
    "        \"split_by\": \"word\",\n",
    "        \"split_length\": 50,\n",
    "        \"split_overlap\": 10\n",
    "    }\n",
    "\n",
    "    splitter = DocumentSplitter(**splitter_config)\n",
    "    embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n",
    "    embedder_transformer = ToEmbeddings(embedder, batch_size=2)\n",
    "    data_transformer = Sequential(splitter, embedder_transformer)\n",
    "    print(data_transformer)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "lightrag-project",
   "language": "python",
   "name": "light-rag-project"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
